This is a Python notebook. This is the notebook you'll use to run and create the analysis code on pickle data sets. Pickle files are created and premanufactured from ROOT files from MicroBooNE LAr experiment.
You should have access to: example_neutrino.ipynb, neutrino_function.py, data folder. You are free to modify neutrino_function.py or create your own plotting functions.
IMPORTANT: It is strongly recommended that only one student of a lab pair should edit this notebook and the files contained within the server directories. This is because both students cannot see the same live edit of the notebook or files at the same time, and it is easy to accidently overwrite each other.
The basic libraries, you may import more if there are present on the server's environment
import numpy as np
import uproot3
import pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import Neutrino_functions
from math import *
import scipy as sci
import scipy.optimize
# MACHINE LEARNING IMPORTS
#import sklearn
#from sklearn import tree
#from sklearn.model_selection import train_test_split
#from sklearn import metrics
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.metrics import confusion_matrix
#from sklearn.metrics import ConfusionMatrixDisplay
# MC
MC_file = 'MC_EXT_flattened.pkl'
# Data
data_file = 'data_flattened.pkl'
# Open file as pandas dataframe
MC_EXT = pd.read_pickle(MC_file)
data = pd.read_pickle(data_file)
# removing 'Subevent' from data
MC_EXT = MC_EXT.drop('Subevent', axis = 1)
data = data.drop('Subevent', axis = 1)
# Uncomment these lines to display the dataframes
pd.set_option('display.max_columns', 100)
# MicroBooNE data
data.head(10)
| _closestNuCosmicDist | trk_len_v | trk_distance_v | category | topological_score | trk_sce_end_z_v | trk_sce_end_y_v | trk_sce_end_x_v | trk_score_v | trk_llr_pid_score_v | trk_sce_start_z_v | trk_sce_start_y_v | trk_sce_start_x_v | reco_nu_vtx_sce_x | reco_nu_vtx_sce_y | reco_nu_vtx_sce_z | trk_energy_tot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 48.170196 | 230.178085 | 0.289455 | 0 | 0.932121 | 662.713745 | -10.419758 | 0.564317 | 1.0 | 0.981798 | 443.558472 | -71.411057 | 32.10228 | 32.137272 | -71.675980 | 443.439148 | 10000.021584 |
| 1 | 48.170196 | 19.862617 | 20.065849 | 0 | 0.932121 | 467.605438 | -41.015533 | 40.286861 | 0.460725 | 0.722245 | 455.0065 | -55.723381 | 36.461731 | 32.137272 | -71.675980 | 443.439148 | 10000.021584 |
| 3 | 177.083498 | 174.338699 | 0.057629 | 0 | 0.588847 | 978.765259 | 9.115969 | 153.437668 | 0.999995 | 0.970214 | 852.828674 | -36.029785 | 42.856102 | 42.869896 | -35.978130 | 852.848938 | 0.629191 |
| 10 | 0.067737 | 264.553223 | 196.515564 | 0 | 0.002079 | 998.799072 | 18.552534 | 225.164139 | 1.0 | 0.977688 | 797.282776 | 63.213791 | 63.001648 | 160.463943 | -113.297066 | 772.441833 | 10000.778217 |
| 11 | 36.361293 | 493.096283 | 0.465464 | 0 | 0.983048 | 865.795166 | -56.678547 | 80.313004 | 1.0 | 0.990403 | 408.639801 | 96.316406 | 141.032898 | 141.039246 | 96.385994 | 408.178772 | 1.296849 |
| 13 | 127.613429 | 181.327194 | 0.272344 | 0 | 0.021950 | 264.979065 | 92.158607 | 255.202988 | 0.999972 | 0.958479 | 230.559982 | -81.870941 | 221.46637 | 221.349503 | -81.868439 | 230.311829 | 9999.794868 |
| 14 | 127.613429 | 10.863928 | 0.334015 | 0 | 0.021950 | 226.903671 | -91.896515 | 218.480057 | 0.791141 | -0.287616 | 229.998383 | -81.993217 | 221.4086 | 221.349503 | -81.868439 | 230.311829 | 9999.794868 |
| 16 | 101.562292 | 8.869122 | 265.905823 | 0 | 0.036152 | 110.97657 | 81.228905 | 30.3652 | 0.011572 | 0.133134 | 114.274597 | 86.827835 | 36.310745 | 170.431870 | 59.736755 | 340.503021 | 9999.221455 |
| 19 | 181.173178 | 168.925873 | 0.349405 | 0 | 0.517549 | 517.184326 | 67.922836 | 156.103348 | 0.999998 | 0.959285 | 406.7724 | 114.33268 | 40.269585 | 40.016575 | 114.483635 | 406.585693 | 9999.828041 |
| 37 | 162.177990 | 47.523987 | 0.030297 | 0 | 0.336266 | 892.599304 | -59.251465 | 37.77512 | 0.93185 | 0.761358 | 870.96521 | -33.337875 | 71.00219 | 71.004707 | -33.363049 | 870.948181 | 0.286233 |
# Monte Carlo Simulation data
MC_EXT.head(10)
| _closestNuCosmicDist | trk_len_v | trk_distance_v | category | topological_score | trk_sce_end_z_v | trk_sce_end_y_v | trk_sce_end_x_v | trk_score_v | trk_llr_pid_score_v | trk_sce_start_z_v | trk_sce_start_y_v | trk_sce_start_x_v | reco_nu_vtx_sce_x | reco_nu_vtx_sce_y | reco_nu_vtx_sce_z | trk_energy_tot | weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 124.478148 | 225.284348 | 1.286398 | 21 | 0.994485 | 510.146088 | -19.997118 | 191.864334 | 1.0 | 0.977081 | 343.433655 | 35.865448 | 54.776821 | 53.900658 | 36.203041 | 342.578735 | 1.164239 | 0.000002 |
| 1 | 124.478148 | 7.850903 | 131.947891 | 21 | 0.994485 | 389.239197 | 46.669083 | 183.160797 | 0.041434 | 0.486446 | 382.998871 | 47.109879 | 178.486572 | 53.900658 | 36.203041 | 342.578735 | 1.164239 | 0.000002 |
| 2 | 141.086923 | 251.017548 | 0.025229 | 5 | 0.007706 | 766.055969 | -50.159794 | 172.77446 | 1.0 | 0.972468 | 658.480286 | 117.427391 | 20.797407 | 20.804905 | 117.408989 | 658.495789 | 9999.989363 | 0.158957 |
| 3 | 10.511966 | 58.736591 | 10.511966 | 4 | 0.066952 | 213.629105 | 117.414757 | 88.746597 | 0.929871 | 0.870984 | 224.018387 | 60.914005 | 78.242538 | 78.443840 | 50.530334 | 223.597870 | 9999.441230 | 0.192390 |
| 4 | 10.511966 | 9.962337 | 3.888895 | 4 | 0.066952 | 235.423004 | 46.921162 | 80.265305 | 0.372258 | 0.404711 | 226.829147 | 51.903919 | 80.192444 | 78.443840 | 50.530334 | 223.597870 | 9999.441230 | 0.192390 |
| 5 | 147.929810 | 289.265442 | 0.152002 | 5 | 0.515178 | 1031.704712 | 52.0289 | 83.527153 | 1.0 | 0.9848 | 761.951172 | -6.10771 | 1.388844 | 1.357146 | -6.141214 | 761.806335 | 9999.934871 | 0.986006 |
| 6 | 96.691013 | 56.727428 | 27.846855 | 21 | 0.998477 | 1036.508911 | -38.234692 | 19.931959 | 0.065278 | 0.902933 | 992.340698 | -65.519211 | 22.75659 | 25.337120 | -80.776207 | 969.163696 | 10000.275867 | 0.158957 |
| 7 | 96.691013 | 67.758522 | 0.264366 | 21 | 0.998477 | 1036.692261 | -78.989571 | 24.593576 | 0.966078 | 0.940751 | 969.034668 | -81.002808 | 25.39159 | 25.337120 | -80.776207 | 969.163696 | 10000.275867 | 0.158957 |
| 8 | 96.691013 | 25.262609 | 7.398347 | 21 | 0.998477 | 987.487305 | -65.503288 | 5.5114 | 0.082593 | 0.79802 | 972.884705 | -76.234444 | 20.853903 | 25.337120 | -80.776207 | 969.163696 | 10000.275867 | 0.158957 |
| 9 | 106.679589 | 73.540779 | 0.226748 | 5 | 0.040650 | 769.010193 | 110.767021 | 45.539276 | 0.950805 | 0.911611 | 759.31604 | 53.225922 | 1.124355 | 1.250130 | 53.412140 | 759.350220 | 9999.371950 | 0.194167 |
First, lets take a sub sample of our Monte Carlo data.
# Reducing the amount of data
MC_EXT_VIS = MC_EXT.sample(int(len(MC_EXT)/10))
# Resetting the index
MC_EXT_VIS.reset_index(drop=True, inplace=True)
# Removing high energy (unphysical) monte carlo results
MC_EXL_VIS = MC_EXT_VIS.drop(MC_EXT_VIS[MC_EXT_VIS.trk_energy_tot > 2].index, inplace = True)
# Resetting the index again
MC_EXT_VIS.reset_index(drop=True, inplace=True)
# Displaying dataframe
print("Length of new data sample: {}".format(len(MC_EXT_VIS)))
MC_EXT_VIS.head(10)
Length of new data sample: 14350
| _closestNuCosmicDist | trk_len_v | trk_distance_v | category | topological_score | trk_sce_end_z_v | trk_sce_end_y_v | trk_sce_end_x_v | trk_score_v | trk_llr_pid_score_v | trk_sce_start_z_v | trk_sce_start_y_v | trk_sce_start_x_v | reco_nu_vtx_sce_x | reco_nu_vtx_sce_y | reco_nu_vtx_sce_z | trk_energy_tot | weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 109.923335 | 9.753438 | 0.196104 | 21 | 0.141806 | 843.857666 | 87.584259 | 32.300518 | 0.35195 | -0.293709 | 837.839783 | 89.955887 | 39.561256 | 39.472691 | 90.067795 | 837.972717 | 0.643080 | 1.912804e-19 |
| 1 | 245.060142 | 207.029053 | 1.125573 | 21 | 0.985005 | 974.434998 | 72.799004 | 179.185837 | 1.0 | 0.972982 | 839.672913 | 104.606934 | 25.715662 | 24.796526 | 104.606293 | 839.032837 | 1.029499 | 1.101524e-05 |
| 2 | 96.834544 | 26.609798 | 0.303976 | 21 | 0.998161 | 65.371712 | -6.642156 | 176.555786 | 0.758531 | 0.510028 | 78.263779 | -20.144962 | 157.715164 | 157.763840 | -20.195173 | 77.966591 | 1.037104 | 1.589572e-01 |
| 3 | 90.340902 | 85.782829 | 0.148672 | 21 | 0.950899 | 544.78125 | -32.613686 | 38.961647 | 0.98762 | 0.928457 | 478.324921 | -16.678541 | 90.641563 | 90.668030 | -16.531004 | 478.366608 | 0.489859 | 1.589572e-01 |
| 4 | 69.439063 | 317.305054 | 0.463226 | 21 | 0.988410 | 915.950806 | -39.043678 | 94.696915 | 1.0 | 0.984279 | 626.002258 | 34.952488 | 198.198578 | 197.811203 | 35.003544 | 626.247437 | 0.986964 | 1.678104e-01 |
| 5 | 187.778475 | 14.696981 | 0.375383 | 21 | 0.998990 | 699.031799 | -95.819641 | 136.346176 | 0.258899 | 0.294872 | 692.731873 | -108.338181 | 132.643311 | 132.527649 | -108.038536 | 692.501038 | 0.906955 | 1.589572e-01 |
| 6 | 121.491287 | 47.562302 | 0.444433 | 21 | 0.388079 | 609.551758 | -38.90694 | 180.253662 | 0.911939 | 0.891623 | 571.660034 | -66.849861 | 183.185593 | 183.205399 | -66.624779 | 571.262939 | 0.901281 | 1.589572e-01 |
| 7 | 1.100449 | 330.549835 | 1.155137 | 21 | 0.870324 | 887.581787 | -102.226463 | 92.720581 | 1.0 | 0.98444 | 614.477051 | 81.978989 | 105.076294 | 105.025459 | 80.851982 | 614.483948 | 1.906634 | 1.589572e-01 |
| 8 | 87.516683 | 129.31514 | 2.041228 | 21 | 0.987471 | 141.814926 | -61.924423 | 104.858124 | 0.999971 | 0.949682 | 25.657482 | -44.632851 | 52.329998 | 51.542709 | -44.499443 | 23.778316 | 0.628923 | 1.589572e-01 |
| 9 | 58.040357 | 38.870113 | 0.361345 | 5 | 0.074545 | 528.516968 | -63.377823 | 138.959503 | 0.93541 | 0.724734 | 545.110962 | -83.800255 | 167.348328 | 167.080872 | -83.594582 | 544.929321 | 0.224669 | 1.589572e-01 |
Lets visualise some of the variables using seaborn.
variable_list = ['category', 'topological_score', 'trk_distance_v', 'trk_energy_tot', 'reco_nu_vtx_sce_x']
# List of categories in text
ptype = [r"Cosmic", r"Out Fid. Vol.", r"EXT", r"$\nu_e$ CC", r"$\nu_{\mu}$ CC", r"$\nu$ NC"]
# Plot data
fig = sns.pairplot(MC_EXT_VIS[variable_list], hue = 'category', palette = 'bright')
# Change location of legend
fig._legend.set_bbox_to_anchor((1.05, 0.5))
# Add Category number and type to legend
for t, l in zip(fig._legend.texts, ptype):
t.set_text(str(t.get_text()) + " - " + str(l))
### IGNORE, ONLY HERE FOR POSTERITY
##plt.figure(figsize=(10,5))
##labels=[r"$\nu$ NC", r"$\nu_{\mu}$ CC", r"$\nu_e$ CC", r"EXT", r"Out. fid. vol.", r"Cosmic"]
##sns.histplot(data=MC_EXT_VIS, x = 'topological_score', hue='category', multiple='stack', palette = 'deep')
##plt.legend(loc='upper left', labels=labels)
##plt.xlabel("topological score" ,fontsize=14)
##plt.xticks(fontsize=14)
##plt.ylabel("count" ,fontsize=14)
##plt.yticks(fontsize=14)
##plt.show()
MC_EXT_VIS_BACK = MC_EXT_VIS.copy(deep = True)
MC_EXT_VIS_BACK = MC_EXT_VIS_BACK.drop(MC_EXT_VIS_BACK[MC_EXT_VIS_BACK.category == 21].index)
# Plot
fig = sns.pairplot(MC_EXT_VIS_BACK[variable_list], hue = 'category', palette = 'bright')
# List of categories in text
ptype_no_mu = [r"Cosmic", r"Out Fid. Vol.", r"EXT", r"$\nu_e$ CC", r"$\nu$ NC"]
# Change location of legend
fig._legend.set_bbox_to_anchor((1.05, 0.5))
# Add Category number and type to legend
for t, l in zip(fig._legend.texts, ptype_no_mu):
t.set_text(str(t.get_text()) + " - " + str(l))
Here we shall modify the shape of our data to allow for its usage in a Decision Tree, then apply the RandomForest method.
'''
# Adjust data shape
features = ['_closestNuCosmicDist', 'trk_len_v', 'trk_distance_v', 'topological_score', 'trk_sce_end_z_v', 'trk_sce_end_y_v', 'trk_sce_end_x_v', 'trk_score_v', 'trk_llr_pid_score_v', 'trk_sce_start_z_v', 'trk_sce_start_y_v', 'trk_sce_start_x_v', 'reco_nu_vtx_sce_x', 'reco_nu_vtx_sce_y', 'reco_nu_vtx_sce_z', 'trk_energy_tot']
output = ['category']
# Setup new database, NEED MORE VALUES
MC_EXT_ML = MC_EXT.copy(deep = True)
MC_EXT_ML = MC_EXT.sample(int(len(MC_EXT)/10))
MC_EXT_ML = MC_EXT_ML.drop(MC_EXT_ML[MC_EXT_ML.category == 21].index)
MC_EXT_ML = MC_EXT_ML.drop(MC_EXT_ML[MC_EXT_ML.category == 10].index)
print(len(MC_EXT_ML))
# Setting variables
X = MC_EXT_ML[features]
y = np.array(MC_EXT_ML['category'])
# Displaying shape, should be (N, 16) (N) where is number of samples.
print(X.shape, y.shape)
# Then split the data up into a "training set" and "test set" using train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # 70/30 training/test split
print(x_train.shape, x_test.shape)
'''
'\n# Adjust data shape\nfeatures = [\'_closestNuCosmicDist\', \'trk_len_v\', \'trk_distance_v\', \'topological_score\', \'trk_sce_end_z_v\', \'trk_sce_end_y_v\', \'trk_sce_end_x_v\', \'trk_score_v\', \'trk_llr_pid_score_v\', \'trk_sce_start_z_v\', \'trk_sce_start_y_v\', \'trk_sce_start_x_v\', \'reco_nu_vtx_sce_x\', \'reco_nu_vtx_sce_y\', \'reco_nu_vtx_sce_z\', \'trk_energy_tot\']\noutput = [\'category\']\n\n# Setup new database, NEED MORE VALUES\nMC_EXT_ML = MC_EXT.copy(deep = True)\nMC_EXT_ML = MC_EXT.sample(int(len(MC_EXT)/10))\nMC_EXT_ML = MC_EXT_ML.drop(MC_EXT_ML[MC_EXT_ML.category == 21].index)\nMC_EXT_ML = MC_EXT_ML.drop(MC_EXT_ML[MC_EXT_ML.category == 10].index)\nprint(len(MC_EXT_ML))\n\n# Setting variables\nX = MC_EXT_ML[features]\ny = np.array(MC_EXT_ML[\'category\'])\n\n# Displaying shape, should be (N, 16) (N) where is number of samples.\nprint(X.shape, y.shape)\n \n# Then split the data up into a "training set" and "test set" using train_test_split\nx_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # 70/30 training/test split\n\nprint(x_train.shape, x_test.shape)\n'
Now we can train a random forest with this data. Lets make it rather simple.
'''
# Fit randomforest to our training data
rf = RandomForestClassifier(n_estimators=1000, criterion = 'gini', max_depth=8, random_state=1)
rf.fit(x_train, y_train)'''
"\n# Fit randomforest to our training data\nrf = RandomForestClassifier(n_estimators=1000, criterion = 'gini', max_depth=8, random_state=1)\nrf.fit(x_train, y_train)"
'''
# Test accuracy
y_pred = rf.predict(x_train)
print("Accuracy on training dataset:",metrics.accuracy_score(y_train, y_pred))
rf_acc_train = metrics.accuracy_score(y_train, y_pred)
y_pred = rf.predict(x_test)
print("Accuracy on testing dataset:",metrics.accuracy_score(y_test, y_pred))
rf_acc_test = metrics.accuracy_score(y_test, y_pred)
'''
'\n# Test accuracy\ny_pred = rf.predict(x_train)\nprint("Accuracy on training dataset:",metrics.accuracy_score(y_train, y_pred))\nrf_acc_train = metrics.accuracy_score(y_train, y_pred)\ny_pred = rf.predict(x_test)\nprint("Accuracy on testing dataset:",metrics.accuracy_score(y_test, y_pred))\nrf_acc_test = metrics.accuracy_score(y_test, y_pred)\n'
Now plot a confusion matrix to see how well it predicts your data, and where it gets confused.
'''
# Plot confusion matrix
ptype_no_mu_e = [r"Cosmic", r"Out Fid. Vol.", r"EXT", r"$\nu$ NC"]
cm = confusion_matrix(y_test, y_pred, normalize = 'true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels = ptype_no_mu_e)
disp.plot()
plt.show()
'''
'\n# Plot confusion matrix\n\nptype_no_mu_e = [r"Cosmic", r"Out Fid. Vol.", r"EXT", r"$\nu$ NC"]\ncm = confusion_matrix(y_test, y_pred, normalize = \'true\')\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels = ptype_no_mu_e)\ndisp.plot()\nplt.show()\n'
'''
# PLot importance
importance = rf.feature_importances_
ytix = features
plt.barh(range(16), importance)
plt.yticks(range(16), features)
plt.xlabel("Importance")
plt.ylabel("Features")
plt.show()
'''
'\n# PLot importance\nimportance = rf.feature_importances_\nytix = features\n\nplt.barh(range(16), importance)\nplt.yticks(range(16), features)\nplt.xlabel("Importance")\nplt.ylabel("Features")\nplt.show()\n'
Run on data
data_ML = data.copy(deep = True)
data_ML = data_ML.sample(int(len(MC_EXT)/10))
data_ML.head(10)
| _closestNuCosmicDist | trk_len_v | trk_distance_v | category | topological_score | trk_sce_end_z_v | trk_sce_end_y_v | trk_sce_end_x_v | trk_score_v | trk_llr_pid_score_v | trk_sce_start_z_v | trk_sce_start_y_v | trk_sce_start_x_v | reco_nu_vtx_sce_x | reco_nu_vtx_sce_y | reco_nu_vtx_sce_z | trk_energy_tot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 65970 | 81.315774 | 139.783356 | 0.440632 | 0 | 0.970784 | 1016.274292 | -56.36414 | 132.530167 | 0.999956 | 0.957768 | 883.901672 | -60.443218 | 169.535751 | 169.701996 | -60.699181 | 884.240845 | 9999.746687 |
| 100112 | 124.186861 | 235.113602 | 0.596751 | 0 | 0.485653 | 863.339233 | -100.756332 | 0.835236 | 1.0 | 0.977007 | 665.785583 | -34.241573 | 108.00032 | 107.768768 | -34.398563 | 666.298157 | 9999.806846 |
| 271477 | 101.803889 | 37.77272 | 3.545382 | 0 | 0.952617 | 358.02124 | -89.884239 | 154.889755 | 0.025401 | 0.884544 | 367.89566 | -58.586166 | 159.406158 | 158.872513 | -54.948883 | 367.366364 | 9999.637370 |
| 76999 | 193.521457 | 133.843597 | 0.032026 | 0 | 0.005007 | 471.365845 | -0.930129 | 254.75943 | 0.999959 | 0.962098 | 421.885559 | 116.561867 | 214.71875 | 214.731232 | 116.539902 | 421.904724 | 9999.549466 |
| 62754 | 11.847562 | 16.097326 | 6.428771 | 0 | 0.732255 | 800.295776 | 104.33152 | 209.319336 | 0.85623 | 0.634969 | 801.913574 | 88.682693 | 209.815628 | 210.030106 | 90.487000 | 795.700500 | 9999.443841 |
| 201675 | 92.145049 | 41.841572 | 0.30188 | 0 | 0.984597 | 72.948624 | 91.038818 | 94.869553 | 0.946872 | 0.855153 | 45.943981 | 77.414948 | 123.647102 | 123.869118 | 77.434853 | 45.737244 | 1.032504 |
| 37851 | 67.493240 | 304.43277 | 0.230276 | 0 | 0.999038 | 295.326111 | 60.742306 | 198.009247 | 1.0 | 0.983262 | -0.321771 | 27.524208 | 138.472992 | 138.518814 | 27.574682 | -0.102834 | 9999.969875 |
| 38346 | 185.517637 | 10.318357 | 4.685859 | 0 | 0.424600 | 762.588135 | 72.340294 | 176.525589 | 0.699519 | 0.256626 | 755.397827 | 76.059059 | 170.284821 | 167.452484 | 77.310509 | 751.921265 | 9999.581756 |
| 363798 | 0.983821 | 132.843872 | 0.983821 | 0 | 0.533926 | 129.693451 | 81.754341 | 86.115463 | 0.999961 | 0.961818 | 0.291334 | 65.994331 | 64.377327 | 64.189568 | 65.815002 | -0.656876 | 9999.546772 |
| 226119 | 74.979788 | 17.399704 | 0.162136 | 0 | 0.087062 | 423.098694 | -25.459415 | 234.131958 | 0.290022 | 0.632918 | 419.052155 | -18.598545 | 248.417419 | 248.489395 | -18.520000 | 418.926270 | 9999.159759 |
'''
import tensorflow as tf
# turn off multithreading
from tensorflow.python.keras import backend as K
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config)
K.set_session(sess)
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input, Dense, Dropout
from tensorflow.python.keras.callbacks import EarlyStopping'''
'\nimport tensorflow as tf\n\n# turn off multithreading\nfrom tensorflow.python.keras import backend as K\nconfig = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)\nsess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config)\nK.set_session(sess)\n\nfrom tensorflow.python.keras.models import Model\nfrom tensorflow.python.keras.layers import Input, Dense, Dropout\nfrom tensorflow.python.keras.callbacks import EarlyStopping'
'''
# import scaling stufff
from sklearn.preprocessing import StandardScaler
MC_EXT_ML_keras = MC_EXT.copy(deep = True)
MC_EXT_ML_keras = MC_EXT_ML_keras.drop(MC_EXT_ML_keras[MC_EXT_ML_keras.category == 21].index)
MC_EXT_ML_keras = MC_EXT_ML_keras.drop(MC_EXT_ML_keras[MC_EXT_ML_keras.category == 10].index)
# In here, set categories to 0,1,2,3
# 0 -> 4 (cosmic)
# 1 -> 5 (Out Fid. Vol.)
# 2 -> 7 (EXT)
# 3 -> 31 (mu CC)
MC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 4, 'category'] = 0
MC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 5, 'category'] = 1
MC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 7, 'category'] = 2
MC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 31, 'category'] = 3
MC_EXT_ML_keras.head(10)
'''
"\n# import scaling stufff\nfrom sklearn.preprocessing import StandardScaler\n\nMC_EXT_ML_keras = MC_EXT.copy(deep = True)\nMC_EXT_ML_keras = MC_EXT_ML_keras.drop(MC_EXT_ML_keras[MC_EXT_ML_keras.category == 21].index)\nMC_EXT_ML_keras = MC_EXT_ML_keras.drop(MC_EXT_ML_keras[MC_EXT_ML_keras.category == 10].index)\n\n# In here, set categories to 0,1,2,3\n# 0 -> 4 (cosmic)\n# 1 -> 5 (Out Fid. Vol.)\n# 2 -> 7 (EXT)\n# 3 -> 31 (mu CC)\nMC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 4, 'category'] = 0\nMC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 5, 'category'] = 1\nMC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 7, 'category'] = 2\nMC_EXT_ML_keras.loc[MC_EXT_ML_keras['category'] == 31, 'category'] = 3\n\nMC_EXT_ML_keras.head(10)\n"
'''
scaler = StandardScaler()
scaled_data = scaler.fit_transform(MC_EXT_ML_keras[features])
y = np.array(MC_EXT_ML_keras['category'])
# scaled, but same shape
print(scaled_data)
print(MC_EXT_ML_keras[features].shape, scaled_data.shape)
# now split for usage
xs_train, xs_test, ys_train, ys_test = train_test_split(scaled_data, y, test_size = 0.3, random_state = 1)'''
"\nscaler = StandardScaler()\nscaled_data = scaler.fit_transform(MC_EXT_ML_keras[features])\n\ny = np.array(MC_EXT_ML_keras['category'])\n\n\n# scaled, but same shape\nprint(scaled_data)\nprint(MC_EXT_ML_keras[features].shape, scaled_data.shape)\n\n# now split for usage\nxs_train, xs_test, ys_train, ys_test = train_test_split(scaled_data, y, test_size = 0.3, random_state = 1)"
'''
# Create Keras Model
inpt = Input(shape=(xs_train.shape[1]))
layer0 = Dense(128, activation='relu')(inpt)
drop1 = Dropout(.2)(layer0)
layer1 = Dense(64, activation='relu')(drop1)
drop2 = Dropout(.2)(layer1)
layer2 = Dense(32, activation='relu')(drop2)
layer3 = Dense(16, activation='relu')(layer2)
output = Dense(4, activation='softmax')(layer3)
kf = Model(inputs=inpt, outputs=output, name='kerasNN')'''
"\n# Create Keras Model\ninpt = Input(shape=(xs_train.shape[1]))\nlayer0 = Dense(128, activation='relu')(inpt)\ndrop1 = Dropout(.2)(layer0)\nlayer1 = Dense(64, activation='relu')(drop1)\ndrop2 = Dropout(.2)(layer1)\nlayer2 = Dense(32, activation='relu')(drop2)\nlayer3 = Dense(16, activation='relu')(layer2)\noutput = Dense(4, activation='softmax')(layer3)\n\nkf = Model(inputs=inpt, outputs=output, name='kerasNN')"
'''# Compile using Adam optimiser
kf.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Get summary of NN
kf.summary()'''
"# Compile using Adam optimiser\nkf.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n\n# Get summary of NN\nkf.summary()"
'''# callback to stop it running forever
callbacks = EarlyStopping(monitor = 'val_loss', patience = 4, min_delta = 0.001, verbose = 1)
# train network
kff = kf.fit(xs_train, ys_train, epochs = 25, batch_size = 32, validation_data = (xs_test, ys_test), callbacks = callbacks)'''
"# callback to stop it running forever\ncallbacks = EarlyStopping(monitor = 'val_loss', patience = 4, min_delta = 0.001, verbose = 1)\n# train network\nkff = kf.fit(xs_train, ys_train, epochs = 25, batch_size = 32, validation_data = (xs_test, ys_test), callbacks = callbacks)"
'''
# evaluate
print("KERAS ACCURACY AND LOSS \n")
print("Test data accuracy and loss")
stest_res = kf.evaluate(xs_test, ys_test)
print("\n")
print("Training data accuracy and loss")
strain_res = kf.evaluate(xs_train, ys_train)
# SKLEARN ACCURACY AND LOSS
print("\n SK LEARN ACCURACY AND LOSS \n")
print("Accuracy on training dataset:",rf_acc_train)
print("Accuracy on testing dataset:",rf_acc_test)'''
'\n# evaluate\nprint("KERAS ACCURACY AND LOSS \n")\n\nprint("Test data accuracy and loss")\nstest_res = kf.evaluate(xs_test, ys_test)\nprint("\n")\n\nprint("Training data accuracy and loss")\nstrain_res = kf.evaluate(xs_train, ys_train)\n\n# SKLEARN ACCURACY AND LOSS\nprint("\n SK LEARN ACCURACY AND LOSS \n")\nprint("Accuracy on training dataset:",rf_acc_train)\nprint("Accuracy on testing dataset:",rf_acc_test)'
'''# create confusion matrix
test_pred = kf.predict(xs_test)
y_pred1 = np.argmax(test_pred, axis=1)'''
'# create confusion matrix\ntest_pred = kf.predict(xs_test)\ny_pred1 = np.argmax(test_pred, axis=1)'
'''matrix = confusion_matrix(ys_test, y_pred1, normalize = 'true')
df_cm = pd.DataFrame(matrix, index = ptype_no_mu_e, columns = ptype_no_mu_e)
plt.figure(figsize = (10,8))
sns.heatmap(df_cm, annot=True)
plt.xlabel('Predicted Particle ID', fontsize = 15)
plt.ylabel('True Particle ID', fontsize = 15)
plt.show()
'''
"matrix = confusion_matrix(ys_test, y_pred1, normalize = 'true')\ndf_cm = pd.DataFrame(matrix, index = ptype_no_mu_e, columns = ptype_no_mu_e)\nplt.figure(figsize = (10,8))\nsns.heatmap(df_cm, annot=True)\nplt.xlabel('Predicted Particle ID', fontsize = 15)\nplt.ylabel('True Particle ID', fontsize = 15)\nplt.show()\n"
You can access this function. It is present in Neutrino_functions.py. You can create your own plotting function if you wish.
'''
#This command shows what input you should give the plotting function. The inputs with =None can be left out when calling the function
help(Neutrino_functions.histogram_plot)'''
'\n#This command shows what input you should give the plotting function. The inputs with =None can be left out when calling the function\nhelp(Neutrino_functions.histogram_plot)'
'''mc_dump, data_dump = Neutrino_functions.histogram_plot(MC_EXT_ML, 'topological_score', 50, 'saved_figure_name', MC_EXT_ML['weight'],xlims=[0,1], plot_data = False, logscale = False, dataFrame = data)
'''
"mc_dump, data_dump = Neutrino_functions.histogram_plot(MC_EXT_ML, 'topological_score', 50, 'saved_figure_name', MC_EXT_ML['weight'],xlims=[0,1], plot_data = False, logscale = False, dataFrame = data)\n"
Modify selection cuts. Remember to cut the same variables in both data sets.
BIN = 100
# Common variables in both dataframes
all_variables_to_plot = list(set(list(MC_EXT)).intersection(list(data)))
for item in all_variables_to_plot:
plt.figure(figsize=(20,15))
i = sns.histplot(data=MC_EXT, x=item, multiple="stack", hue="category", palette = 'deep', weights = MC_EXT['weight'], bins = BIN, legend = False)
i.set(xlabel=item, ylabel = "Events")
#plt.yscale('log')
plt.xlim([np.min(MC_EXT[item]), np.max(MC_EXT[item])])
plt.legend(title='Run 3',fontsize=16, loc='upper right', labels=[r"$\nu$ NC", r"$\nu_{\mu}$ CC", r"$\nu_e$ CC", r"EXT", r"Out. fid. vol.", r"Cosmic"])
plt.show(i)
def Selections(frame):
# Basic variables present in dataframe
trk_start_x_v = frame['trk_sce_start_x_v']
trk_start_y_v = frame['trk_sce_start_y_v']
trk_start_z_v = frame['trk_sce_start_z_v']
trk_end_x_v = frame['trk_sce_end_x_v']
trk_end_y_v = frame['trk_sce_end_y_v']
trk_end_z_v = frame['trk_sce_end_z_v']
reco_x = frame['reco_nu_vtx_sce_x']
reco_y = frame['reco_nu_vtx_sce_y']
reco_z = frame['reco_nu_vtx_sce_z']
topological = frame['topological_score']
trk_score_v = frame['trk_score_v']
trk_dis_v = frame['trk_distance_v']
trk_len_v = frame['trk_len_v']
trk_energy_tot = frame['trk_energy_tot']
# adding in nucosmic distance
cosmic_distance = frame['_closestNuCosmicDist']
cat = frame['category']
# List of [signal entries , purity , label]. Can be appended each selection cut
#event = []
#event.append([len(frame[cat==21]['category']),len(frame[cat==21]['category'])/len(frame['category']),'basic'])
# select the conditions you want to apply
# The example here removes tracks of lengths that are greater than 1000 cm
selection = trk_energy_tot < 2
selection = selection & ((trk_len_v > 200) & (trk_len_v < 1000))
# limit the fiducial volume (only need to apply this once, maybe use hard limits on one of the spatial coordinates)
#selection = selection & (trk_start_x_v < np.max(trk_start_x_v)*0.99) & (trk_start_x_v > np.max(trk_start_x_v)*0.01)
#selection = selection & (trk_start_y_v < np.max(trk_start_y_v)*0.99) & (trk_start_y_v > np.max(trk_start_y_v)*0.01)
#selection = selection & (trk_start_z_v < np.max(trk_start_z_v)*0.99) & (trk_start_z_v > np.max(trk_start_z_v)*0.01)
# one more cut as the above one is imperfect
###selection = selection & (trk_end_x_v < 250) & (trk_end_x_v > 10)
###selection = selection & (trk_start_x_v < 250) & (trk_start_x_v > 10)
# remove the massive energy spike thats unphysical
#selection = selection & (trk_energy_tot < 2)
# remove the 0 component from nu cosmic distance
selection = selection & (cosmic_distance > 10)
# take out topological score below a certain value (0.4)
selection = selection & (topological > 0.4)
# remove trk_len_v values below 200
#selection = selection & (trk_len_v > 200)
### OLES SELECTION CRITERIA
#selection = selection & (trk_end_z_v > 10) & (trk_start_x_v > 20)&(trk_start_x_v<240) & (trk_start_y_v > -100)&(trk_start_y_v<100) & (cosmic_distance > 20) & (trk_len_v > 30) & (trk_len_v < 1000) & (trk_end_y_v > -110) & (trk_end_y_v < 100)
# separate frame from frame_selection to allow for efficiency calculations
frame_selection = frame[selection]
# quick check to see if the table has any truth data (as this code is run on the real data as well as MC data)
# bit of a janky solution currently but it'll do
if (len(frame[cat==21]['category']) != 0):
# before applying, calculate efficiency and purity in here (its easier!)
efficiency = len(frame_selection[cat==21]['category'])/len(frame[cat==21]['category'])
purity = len(frame_selection[cat==21]['category'])/len(frame_selection)
print("WARNING: Cutting efficiency will only be accurate across first application, after that point it will be set to 1.00.\nCutting Efficiency: {:.2f}\nCutting Purity: {:.2f}".format(efficiency, purity))
# Apply selection on dataframe
frame = frame_selection
return frame
MC_EXT = Selections(MC_EXT)
data_frame = Selections(data)
# be careful here, as you're trimming a trimmed dataset if you adjust the selections after running this file!
C:\Users\x58333ml\AppData\Local\Temp\ipykernel_6710872\2264663337.py:66: UserWarning: Boolean Series key will be reindexed to match DataFrame index. efficiency = len(frame_selection[cat==21]['category'])/len(frame[cat==21]['category']) C:\Users\x58333ml\AppData\Local\Temp\ipykernel_6710872\2264663337.py:67: UserWarning: Boolean Series key will be reindexed to match DataFrame index. purity = len(frame_selection[cat==21]['category'])/len(frame_selection)
WARNING: Cutting efficiency will only be accurate across first application, after that point it will be set to 1.00. Cutting Efficiency: 0.06 Cutting Purity: 0.95
BIN = 100
# Common variables in both dataframes
all_variables_to_plot = list(set(list(MC_EXT)).intersection(list(data_frame)))
for item in all_variables_to_plot:
plt.figure(figsize=(20,15))
i = sns.histplot(data=MC_EXT, x=item, multiple="stack", hue="category", palette = 'deep', weights = MC_EXT['weight'], bins = BIN, legend = False)
i.set(xlabel=item, ylabel = "Events")
#plt.yscale('log')
plt.xlim([np.min(MC_EXT[item]), np.max(MC_EXT[item])])
plt.legend(title='Run 3',fontsize=16, loc='upper right', labels=[r"$\nu$ NC", r"$\nu_{\mu}$ CC", r"$\nu_e$ CC", r"EXT", r"Out. fid. vol.", r"Cosmic"])
plt.show(i)
It is recommended to plot purity and efficiency after each variable cut.
HINT: Function Selection() has commented lines of code that you may find useful for purpose of plotting changes in purity/efficiency after every cut.
To do so would require the initial data to always be saved as a comparison. Which isn't a bad idea but is space inefficient. So currently the answer to exercise 8 is in the selection function of exercise 7.
# List of [signal entries , purity , label]. Can be appended each selection cut
#event = []
#event.append([len(frame[cat==21]['category']),len(frame[cat==21]['category'])/len(frame['category']),'basic'])
# efficiency currently put inside the cutting function. MOVE IT OUT
Final representation of MC and data after applying the cuts
mc_dump, data_dump = Neutrino_functions.histogram_plot(MC_EXT, 'trk_energy_tot', 23, 'saved_figure_name', MC_EXT['weight'],xlims=[np.min(MC_EXT['trk_energy_tot']), np.max(MC_EXT['trk_energy_tot'])], plot_data = True, logscale = False, dataFrame = data_frame)
# mc_dump and data_dump give you the heights of the histograms
#print(MC_EXT)
print(MC_EXT['category'].unique())
print(MC_EXT['category'].value_counts())
[21 5 4 10 31 7] 21 14497 5 369 4 129 7 116 31 70 10 13 Name: category, dtype: int64
You want to look at section 4.5 for all of this. Also you're wanting to be careful with the binning here! Not discussed at all in lab book
Write oscillation function and chi square. Apply oscillation function on MC data set and minimize chi square
# now we can get the chi_squared uncertainty here
def chi_squared_pearson(MC, data):
'''
Takes arrays/lists of MC entries and data entries and finds the chi squared values for each component.
MC: Monte_Carlo entries (per bin)
data: Truth data entries (per bin)
'''
numer = np.square(MC - data)
denom = (0.15*MC)**2
div = np.divide(numer, denom)
return np.sum(div)
def chi(theta, mass, MC, data_height):
MC_height, MC_energy = MC
oscillated_height = MC_height * (1 - theta * (np.sin(1.27 * mass * 0.47 / MC_energy))^2)
numer = np.square(oscillated_height - data_height)
denom = np.square(0.15 * oscillated_height)
div = np.divide(numer, denom)
return np.sum(div)
def idk(a, b):
c = a+b
d = a-b
e = a*b
f = a/b
return c, d, e, f
idk(1,2)
g, h, i, j = idk(1,2)
print(g, h, i, j)
3 -1 2 0.5
def oscillation_func(sin2thetasq, mass, E):
"""
Variables needed to get probability out:
mixing angle as sin squared 2 theta
mass splitting (mass)
Length travelled (L) ~ 0.47km
Energy (E) taken from reconstructed v_mu energy GeV (list)
Pulled from:
https://warwick.ac.uk/fac/sci/physics/staff/academic/boyd/stuff/neutrinolectures/lec_oscillations.pdf
Page 3
"""
L = 0.5 #km
### mass, is it squared already? I assume so
# dividing one value by array of energies
inside = 1.27 * mass * np.divide(L,E)
sin_val = np.square(np.sin(inside))
return (sin2thetasq * sin_val)
# So, minimise using best fit functions
def fit_func(data, theta, mass):
entries, energies = data
return entries * (1-oscillation_func(theta, mass, energies))
# need to collect correct bin energies
def energy_vals(MC, bins):
"""
:MC: Monte Carlo data (pass in trk_energy_tot, category and weight)
:bins: number of bins
:bin_centers: centre of each bin across the range of energies, useful for "energy values" of each bin
:bin_no: numpy array with list matching the MC data corresponding to which bin each data value would fall into
"""
# if data has been cleaned, this will collect edge of bins
bin_edges = np.linspace(np.min(MC['trk_energy_tot']), np.max(MC['trk_energy_tot']),bins+1,endpoint = True)
# take bin centres
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
# will leave digitise component for later
return bin_centers
This is all just testing to ensure that the functions work as expected
bin_centers = energy_vals(MC_EXT, 23)
popt, pcov = scipy.optimize.fmin(chi, (theta, mass), args=(mc_dump, bin_centers, data_dump))
print(popt, pcov)
[1.52393679e-12 1.55030166e-12] [[0. 0.] [0. 0.]]
#print(1-oscillation_func(popt[0], popt[1], bin_centers))
print(mc_dump, data_dump)
[48.29789877278941, 101.50546856460139, 163.96599109562678, 208.47337031716475, 248.30961941738056, 262.7017315919509, 265.9729799913259, 225.26715397199746, 220.62464330790453, 194.24473160329987, 167.19820346018673, 153.0560425907467, 122.08138597211715, 93.45111367512935, 97.19663098760088, 67.70541814542858, 56.289020196423444, 51.936164350031994, 42.43714673815587, 37.7579655388356, 32.78639025876319, 25.488405868451718, 19.88193639836626] [75, 130, 207, 208, 276, 255, 272, 262, 217, 199, 159, 146, 106, 91, 91, 75, 55, 46, 52, 28, 35, 27, 10]
# lets just try scaling all the data rather than just muon neutrinos. also makes the code easier
def chi_squared_mapping(MC_heights, data_heights, theta_list, mass_list, bin_centers):
A = len(theta_list)
B = len(mass_list)
chi_square_map = np.zeros((A, B))
# create double look for theta_list and mass_list
for i in range(A):
for j in range(B):
# calculate the probability of oscillation for the bin_centers
p_vals = (1-oscillation_func(theta_list[i], mass_list[j], bin_centers))
# apply these probability values to the heights of our MC data
MC_heights_new = p_vals*MC_heights
# check if MC_heights has zeroes in it
if (0 in MC_heights_new):
print("Zero detected in theta: {}, mass: {}".format(theta_list[j],mass_list[i]))
####print(theta_list[i], mass_list[j])
# then save the chi_squared_values for the new MC_heights
### SEE, NOW I AND THEN J...WHY?
chi_square_map[i,j] = chi_squared_pearson(MC_heights_new, data_heights)
#if chi_squared_pearson(MC_heights_new, data_heights) < 224.40294:
#print(theta_list[i], mass_list[j])
###print("passed")
return chi_square_map
# now lets try mapping the chi_square for two linspaces
increment = 100
theta_vals = np.linspace(0.000001, 0.5, num = increment) # note that I dont go to 1, because that breaks plot. choose range carefully!
mass_vals = np.linspace(0.00001, 100, num = increment)
#mass_vals = np.asarray([])
#n = 0
#while n < 100:
# mass_vals = np.append(mass_vals, 91.919)
# n+=1
mass_vals2 = np.logspace(1.98, 2, num = 10000)
theta = 0.040
mc_dump = np.asarray(mc_dump)
chi_squared = np.zeros(len(mass_vals2))
for value in range(len(mass_vals2)):
# probability of oscillation
p_value = 1 - (theta * (np.sin(1.27 * 0.47 * (mass_vals2[value]) / bin_centers))**2)
#print(p_value)
#print(mc_dump)
# oscillated nb of events
MC_heights_new = p_value * mc_dump
# chi squared
chi_squared[value] = chi_squared_pearson(MC_heights_new, data_dump)
plt.plot(mass_vals2, chi_squared, label=r'χ$^2$')
line = plt.hlines(40.373+1, xmin=np.min(mass_vals2), xmax=np.max(mass_vals2), color='r', label=r'χ$_{min}$$^2$ + 1')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Squared mass difference')
plt.ylabel('Chi squared')
plt.tight_layout()
plt.legend()
plt.savefig('Mass scan plot.png', dpi=300)
plt.show()
#print(chi_squared)
print(np.min(chi_squared))
print(np.where(chi_squared==np.min(chi_squared)))
print(mass_vals2[5413])
print(np.where(np.round(chi_squared, decimals=3)==np.round(np.min(chi_squared), decimals=3)+1))
40.373329636571256 (array([5413], dtype=int64),) 97.91000724601668 (array([4338, 6501], dtype=int64),)
97.980-98.402
-0.42199999999999704
97.980-97.426
0.554000000000002
(0.42199999999999704+0.554000000000002)/2
0.48799999999999955
10**0.422
2.6424087573219466
theta_valst = np.logspace(-3, -1, 10000)
mass = 91.919
mc_dump = np.asarray(mc_dump)
chi_squared = np.zeros(len(theta_valst))
for value in range(len(theta_valst)):
# probability of oscillation
p_value = 1 - (theta_valst[value] * (np.sin(1.27 * 0.47 * mass / bin_centers))**2)
#print(p_value)
#print(mc_dump)
# oscillated nb of events
MC_heights_new = p_value * mc_dump
# chi squared
chi_squared[value] = chi_squared_pearson(MC_heights_new, data_dump)
plt.plot(theta_valst, chi_squared)
plt.xscale('log')
plt.yscale('log')
plt.show()
#print(chi_squared)
#print(np.min(chi_squared))
#print(np.where(chi_squared==np.min(chi_squared)))
#print(np.where(chi_squared==np.min(chi_squared)+1))
#print(mass_vals[97])
#data = (mc_dump, bin_centers)
#popt, pcov = scipy.optimize.fmin(chi, (theta_vals, mass_vals), args=(data, data_dump))
#print(popt, pcov)
chi_square_map = chi_squared_mapping(mc_dump, data_dump, theta_vals, mass_vals, bin_centers)
print(np.min(chi_square_map))
40.44190180786447
chi_square_div = np.divide(chi_square_map, 21)
print(chi_square_div)
[[ 1.94524731 1.94525225 1.94525301 ... 1.94524975 1.94525352 1.94525085] [ 1.94524731 1.97052712 1.97496031 ... 1.95811352 1.97746846 1.96378172] [ 1.94524731 1.99645512 2.00647278 ... 1.97203725 2.01141428 1.98361209] ... [ 1.94524731 10.17097468 27.75309257 ... 17.46452456 27.8813279 20.73624964] [ 1.94524731 10.36825304 28.61191201 ... 18.00604364 28.75120729 21.38074434] [ 1.94524731 10.5697248 29.49911626 ... 18.56624791 29.65031288 22.04733344]]
plt.figure(figsize = (9,6))
x_con, y_con = np.meshgrid(theta_vals, mass_vals, indexing = 'ij')
z_con = chi_square_div*21
cont = plt.contourf(x_con, y_con, z_con, 20, cmap='RdGy')
plt.colorbar(cont)
plt.title("pearson-chisquared for sin2thetasq against mass splitting")
plt.xlabel("sin2thetasq")
plt.ylabel("mass splitting (eV^2)")
plt.savefig("contour_map_new.png")
plt.yscale('log')
plt.xscale('log')
plt.show()
# finding point of lowest chi squared on the plot, and the values at which it occurs
print((chi_square_div*21).shape)
print(np.where(chi_square_div*21==np.min(chi_square_div*21)))
print(chi_square_div[12][41])
# so plot this low point
dof = 21 # Degrees of freedom
zmin = np.min(chi_square_div)
print(zmin*dof)
minchi = zmin*dof
mask = np.array(chi_square_div) == zmin
print(mask[12][41])
# So this should be the lowest value
print(theta_vals[8], mass_vals[91])
best_parameters = (theta_vals[8], mass_vals[91]) # Best fit parameters corresponding to minimum chi squared
(100, 100) (array([8], dtype=int64), array([91], dtype=int64)) 2.176937647679286 40.44190180786447 False 0.0404049595959596 91.91919272727272
# use meshgrid to assure correct formatting
xx, yy = np.meshgrid(theta_vals, mass_vals, indexing='ij')
#print(xx)
#print(yy)
# trying aleksander mixing angle component
#mixing_angle_red = [(1-np.sqrt(1-a))*(1-np.sqrt(1-0.36)) for a in theta_vals]
#xr, yr = np.meshgrid(mixing_angle_red, mass_vals, indexing = 'ij')
# Go plot the CL things. Plot 90%, 95%, 99%
# 90% rDOF = 19.768, 95% rDOF = 17.708, 99% rDOF = 14.257
plt.figure(figsize = (9,6))
colors = ['red', 'green', 'blue']
#8.897
CS = plt.contour(xx, yy, chi_square_div*21, [4.605+minchi, 5.991+minchi, 9.210+minchi], colors = colors)
#plt.clabel(CS, fontsize=9, inline=1)
# plot minimum point
plt.scatter(best_parameters[0], best_parameters[1])
plt.annotate('Minimum value of chi_squared', xy=(best_parameters[0], best_parameters[1]), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
plt.title("CL plot for pearson-chisquared for sin2thetasq against mass splitting")
plt.xlabel("sin2thetasq")
plt.ylabel("mass splitting (eV^2)")
plt.xscale('log')
plt.yscale('log')
# legend production
labels = ['90%', '95%', '99%']
#for i in range(len(labels)):
# CS.collections[i].set_label(labels[i])
h = CS.collections
l = [f'{a:.1f}'for a in CS.levels]
l = [l[i] + (" : " + str(labels[i])) for i in range(len(CS.levels))]
proxy = [plt.Rectangle((0,0),1,1,color = colors[i]) for i in range(len(colors))]
plt.legend(proxy,l, loc='lower left')
#plt.legend(loc='lower left')
plt.savefig("CL_map.png")
plt.show()
make new probability oscillation functions for the 3+1 model, apply them to get a result
#ue_theta_vals = 0.24*theta_vals
#cal1 = 1/(2*(1-0.24))*0.24
#cal2 = (1 - np.sqrt(1-theta_vals))
#ue_theta_vals = cal1*cal2
ue_theta_vals = [(1-np.sqrt(1-a))*(1-np.sqrt(1-0.24)) for a in theta_vals]
xxx, yyy = np.meshgrid(ue_theta_vals, mass_vals, indexing='ij')
Results are extracted from MiniBooNE (orange) and LSND (blue)
import matplotlib.patches as mpatches
from matplotlib.legend_handler import HandlerPatch
# Load data
LSND_data = pd.read_csv('DataSet_LSND.csv').to_numpy()
MiniBooNE_data = pd.read_csv('DataSet_MiniBooNE.csv').to_numpy()
plt.figure(figsize=(9,6))
# Plot data
plt.plot(LSND_data[:,0],LSND_data[:,1],'o', zorder=1)
plt.plot(MiniBooNE_data[:,0],MiniBooNE_data[:,1],'o', zorder=2)
# 0.24 multiple
#plt.annotate('Minimum value of chi_squared', xy=(0.0212*0.24, 22.23), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
# 99% CL, 90% CL
colors = ['blue', 'red', 'k']
CS = plt.contour(xxx, yyy, chi_square_div*dof, [4.605+minchi, 9.210+minchi], colors = colors, zorder=3)
#CS = plt.contour(xxx, yyy, chi_square_div*21, [29.615+minchi,38.932+minchi], colors = colors, zorder=3)
labels = ['90%', '99%']
plt.scatter((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1], color = 'orange', s=15, zorder=4, label='minchi')
#plt.annotate('Minimum value of chi_squared', xy=((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1]), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
#(r'Minimum $\Chi^2$')
class HandlerEllipse(HandlerPatch):
def create_artists(self, legend, orig_handle,
xdescent, ydescent, width, height, fontsize, trans):
center = 0.5 * width - 0.5 * xdescent, 0.5 * height - 0.5 * ydescent
p = mpatches.Ellipse(xy=center, width=height + xdescent,
height=height + ydescent)
self.update_prop(p, orig_handle, legend)
p.set_transform(trans)
return [p]
h = CS.collections
l = [f'{a:.1f}'for a in CS.levels]
l = [l[i] + (" : " + str(labels[i])) for i in range(len(CS.levels))]
l.append(r'Minimum χ$^2$')
proxy = [plt.Rectangle((0,0),1,1,color = colors[i]) for i in range(len(colors)-1)]
proxy.append(mpatches.Circle((0.5, 0.5), radius = 0.25, facecolor='orange', edgecolor="orange"))
plt.legend(proxy, l, handler_map={mpatches.Circle: HandlerEllipse()}).get_frame().set_facecolor('#FFFFFF')
plt.xlabel(r'$sin^2(2\theta_{\mu e})=sin^2(\theta_{24})sin^2(2\theta_{14})$',fontsize=20)
plt.ylabel(r'$\Delta$ $m_{14}^2$',fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.xlim([10**(-4), 0.1])
plt.ylim([10**(-3), 10**2])
plt.yscale('log')
plt.xscale('log')
plt.tight_layout()
plt.savefig('Final contour plot!.png', dpi=300)
plt.show()
# check sigma values around the minimum value of chi squared?
#import matplotlib
#for cname, hex in matplotlib.colors.cnames.items():
# print(cname,hex)
# 95 % confidence level
import matplotlib.patches as mpatches
from matplotlib.legend_handler import HandlerPatch
# Load data
LSND_data = pd.read_csv('DataSet_LSND.csv').to_numpy()
MiniBooNE_data = pd.read_csv('DataSet_MiniBooNE.csv').to_numpy()
plt.figure(figsize=(9,6))
# Plot data
plt.plot(LSND_data[:,0],LSND_data[:,1],'o', zorder=1)
plt.plot(MiniBooNE_data[:,0],MiniBooNE_data[:,1],'o', zorder=2)
# 0.24 multiple
#plt.annotate('Minimum value of chi_squared', xy=(0.0212*0.24, 22.23), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
#calculating the furthest point from the min chi squared
colors = ['purple', 'orange']
CS = plt.contour(xxx, yyy, chi_square_div*dof, [5.991+minchi], colors = colors, zorder=3)
ui = CS.allsegs[0]
aangle = ui[0][:, 0]
mmass = ui[0][:, 1]
max_angle = np.amax(aangle)
min_angle = np.amin(aangle)
max_mass = np.amax(mmass)
min_mass = np.amin(mmass)
#calculating uncertainties
angle_unc = (max_angle-min_angle)/2
mass_unc = (max_mass-min_mass)/2
print(angle_unc, mass_unc)
#CS = plt.contour(xxx, yyy, chi_square_div*21, [29.615+minchi,38.932+minchi], colors = colors, zorder=3)
labels = ['95%']
plt.scatter((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1], color = 'orange', s=15, zorder=4, label='minchi')
#plt.errorbar((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1], yerr=mass_unc , xerr=angle_unc, fmt='o', color = '#FFA500', zorder=4, label='minchi')
#plt.annotate('Minimum value of chi_squared', xy=((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1]), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
print((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)))
#(r'Minimum $\Chi^2$')
class HandlerEllipse(HandlerPatch):
def create_artists(self, legend, orig_handle,
xdescent, ydescent, width, height, fontsize, trans):
center = 0.5 * width - 0.5 * xdescent, 0.5 * height - 0.5 * ydescent
p = mpatches.Ellipse(xy=center, width=height + xdescent,
height=height + ydescent)
self.update_prop(p, orig_handle, legend)
p.set_transform(trans)
return [p]
h = CS.collections
l = [f'{a:.1f}'for a in CS.levels]
l = [l[i] + (" : " + str(labels[i])) for i in range(len(CS.levels))]
l.append(r'Minimum χ$^2$')
proxy = [plt.Rectangle((0,0),1,1,color = colors[i]) for i in range(len(colors)-1)]
proxy.append(mpatches.Circle((0.5, 0.5), radius = 0.25, facecolor='#FFA500', edgecolor="#FFA500"))
plt.legend(proxy, l, handler_map={mpatches.Circle: HandlerEllipse()}).get_frame().set_facecolor('#FFFFFF')
plt.xlabel(r'$sin^2(2\theta_{\mu e})=sin^2(\theta_{24})sin^2(2\theta_{14})$',fontsize=20)
plt.ylabel(r'$\Delta$ $m_{14}^2$',fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.xlim([10**(-4), 0.1])
plt.ylim([10**(-3), 10**2])
plt.yscale('log')
plt.xscale('log')
plt.tight_layout()
plt.savefig('Final contour plot 95%!.png', dpi=300)
plt.show()
0.017674976645695065 49.984427186565 0.002617074493875873
# 68 % confidence level
import matplotlib.patches as mpatches
from matplotlib.legend_handler import HandlerPatch
# Load data
LSND_data = pd.read_csv('DataSet_LSND.csv').to_numpy()
MiniBooNE_data = pd.read_csv('DataSet_MiniBooNE.csv').to_numpy()
plt.figure(figsize=(9,6))
# Plot data
plt.plot(LSND_data[:,0],LSND_data[:,1],'o', zorder=1)
plt.plot(MiniBooNE_data[:,0],MiniBooNE_data[:,1],'o', zorder=2)
# 0.24 multiple
#plt.annotate('Minimum value of chi_squared', xy=(0.0212*0.24, 22.23), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
#calculating the furthest point from the min chi squared
colors = ['purple', 'orange']
CS = plt.contour(xxx, yyy, chi_square_div*dof, [2.27887+minchi], colors = colors, zorder=3)
ui = CS.allsegs[0]
aangle = ui[0][:, 0]
mmass = ui[0][:, 1]
max_angle = np.amax(aangle)
min_angle = np.amin(aangle)
max_mass = np.amax(mmass)
min_mass = np.amin(mmass)
#calculating uncertainties
angle_unc = (max_angle-min_angle)/2
mass_unc = (max_mass-min_mass)/2
print(angle_unc, mass_unc)
#CS = plt.contour(xxx, yyy, chi_square_div*21, [29.615+minchi,38.932+minchi], colors = colors, zorder=3)
labels = ['68%']
#plt.scatter((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1], color = 'orange', s=15, zorder=4, label='minchi')
plt.errorbar((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1], yerr=mass_unc , xerr=angle_unc, fmt='o', color = '#FFA500', zorder=4, label='minchi')
#plt.annotate('Minimum value of chi_squared', xy=((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)), best_parameters[1]), xytext=(0.0015,50),arrowprops=dict(facecolor='black', shrink=0.05))
print((1-np.sqrt(1-best_parameters[0]))*(1-np.sqrt(1-0.24)))
#(r'Minimum $\Chi^2$')
class HandlerEllipse(HandlerPatch):
def create_artists(self, legend, orig_handle,
xdescent, ydescent, width, height, fontsize, trans):
center = 0.5 * width - 0.5 * xdescent, 0.5 * height - 0.5 * ydescent
p = mpatches.Ellipse(xy=center, width=height + xdescent,
height=height + ydescent)
self.update_prop(p, orig_handle, legend)
p.set_transform(trans)
return [p]
h = CS.collections
l = [f'{a:.1f}'for a in CS.levels]
l = [l[i] + (" : " + str(labels[i])) for i in range(len(CS.levels))]
l.append(r'Minimum χ$^2$')
proxy = [plt.Rectangle((0,0),1,1,color = colors[i]) for i in range(len(colors)-1)]
proxy.append(mpatches.Circle((0.5, 0.5), radius = 0.25, facecolor='#FFA500', edgecolor="#FFA500"))
plt.legend(proxy, l, handler_map={mpatches.Circle: HandlerEllipse()}).get_frame().set_facecolor('#FFFFFF')
plt.xlabel(r'$sin^2(2\theta_{\mu e})=sin^2(\theta_{24})sin^2(2\theta_{14})$',fontsize=20)
plt.ylabel(r'$\Delta$ $m_{14}^2$',fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.xlim([10**(-4), 0.1])
plt.ylim([10**(-3), 10**2])
plt.yscale('log')
plt.xscale('log')
plt.tight_layout()
plt.savefig('Final contour plot 68%!.png', dpi=300)
plt.show()
0.018367713135213994 49.99477874779713 0.002617074493875873
5*49.984
5*0.0177